#!/bin/bash

task_name="sst5"
number_of_ice_examples=50 

model_name="bert-base-uncased"          # sentence encoder model name
pred_dir="output/${task_name}/results/"

## LLM configs 
llm_model_path="Qwen2.5-1.5B"           # Path to the LLM model
n_tokens=1600                           # Number of tokens to use for ICE
batch_size=1                            # Batch size for inference


## Dataset subset configs
example_bank_size=null           # Number of examples in the example bank
example_bank_segment=null        # use first 1000 examples, 1 will give next 1000 examples
test_set_size=null               # Number of questions in the test set
test_set_segment=null              # use first 250 examples, 1 will give next 250 examples


# mkdir 
mkdir -p output/${task_name}/results/
mkdir -p output/${task_name}/

echo "Running for task: ${task_name}"

echo "-------------------------------------------------- BM25 retriever --------------------------------------------------------"
output_file_1="output/${task_name}/bm25_retriever_${task_name}.jsonl"  

python bm25_retriever.py \
    task_name=${task_name} \
    output_file=${output_file_1} \
    num_ice=$((number_of_ice_examples + 1)) \
    index_reader.ds_size=${example_bank_size} \
    index_reader.ds_segment=${example_bank_segment} \
    index_reader.task_name=${task_name} 


echo "-------------------------------------------------- DPP retriever --------------------------------------------------------"
embedding_npy="output/${task_name}/embeddings_${task_name}_${model_name}.npy"
output_file_2="output/${task_name}/dpp_retriever_${task_name}_${model_name}.jsonl"

python dpp_retriever.py \
        pretrained_model_path=0 \
        model_name=${model_name} \
        task_name=${task_name} \
        output_file=${output_file_2} \
        embedding_npy=${embedding_npy} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        index_reader.task_name=${task_name} \
        num_ice=${number_of_ice_examples} \


echo "-------------------------------------------------- Dense retriever --------------------------------------------------------"
embedding_npy="output/${task_name}/embeddings_${task_name}_${model_name}.npy"
output_file_3="output/${task_name}/dense_retriever_${task_name}_${model_name}.jsonl"

python dense_retriever.py \
        pretrained_model_path=0 \
        model_name=${model_name} \
        task_name=${task_name} \
        output_file=${output_file_3} \
        embedding_npy=${embedding_npy} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        index_reader.task_name=${task_name} \
        num_ice=${number_of_ice_examples} \


echo "-------------------------------------------------- Submodular retriever bert-base-uncased lambda 0 --------------------------------------------------------"
lambda_val=0
lambda_safe=${lambda_val/./_}
output_file_4="output/${task_name}/submodular_lambda_${lambda_safe}_retriever_${task_name}_${model_name}.json"

python submodular_retriever_gpu.py \
        pretrained_model_path=0 \
        model_name=${model_name} \
        task_name=${task_name} \
        output_file=${output_file_4} \
        faiss_index=${faiss_index} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        index_reader.task_name=${task_name} \
        num_ice=${number_of_ice_examples} \
        lambd=${lambda_val}


echo "-------------------------------------------------- Submodular retriever bert-base-uncased lambda 0.5 --------------------------------------------------------"
lambda_val=0.5
lambda_safe=${lambda_val/./_}
output_file_5="output/${task_name}/submodular_lambda_${lambda_safe}_retriever_${task_name}_${model_name}.json"

python submodular_retriever_gpu.py \
        pretrained_model_path=0 \
        model_name=${model_name} \
        task_name=${task_name} \
        output_file=${output_file_5} \
        faiss_index=${faiss_index} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        index_reader.task_name=${task_name} \
        num_ice=${number_of_ice_examples} \
        lambd=${lambda_val}


echo "-------------------------------------------------- Submodular retriever bert-base-uncased lambda 1 --------------------------------------------------------"
lambda_val=1
lambda_safe=${lambda_val/./_}
output_file_6="output/${task_name}/submodular_lambda_${lambda_safe}_retriever_${task_name}_${model_name}.json"

python submodular_retriever_gpu.py \
        pretrained_model_path=0 \
        model_name=${model_name} \
        task_name=${task_name} \
        output_file=${output_file_6} \
        faiss_index=${faiss_index} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        index_reader.task_name=${task_name} \
        num_ice=${number_of_ice_examples} \
        lambd=${lambda_val}


echo "-------------------------------------------------- Kernel retriever bert-base-uncased lambda 0 --------------------------------------------------------"
lambda_val=0
lambda_safe=${lambda_val/./_}
output_file_7="output/${task_name}/kernel_lambda_${lambda_safe}_retriever_${task_name}_${model_name}.json"

python kernel_retriever_gpu.py \
        pretrained_model_path=0 \
        model_name=${model_name} \
        task_name=${task_name} \
        output_file=${output_file_7} \
        faiss_index=${faiss_index} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        index_reader.task_name=${task_name} \
        num_ice=${number_of_ice_examples} \
        lambd=${lambda_val}


echo "-------------------------------------------------- Kerel retriever bert-base-uncased lambda 0.5 --------------------------------------------------------"
lambda_val=1
lambda_safe=${lambda_val/./_}
output_file_8="output/${task_name}/kernel_lambda_${lambda_safe}_retriever_${task_name}_${model_name}.json"

python kernel_retriever_gpu.py \
        pretrained_model_path=0 \
        model_name=${model_name} \
        task_name=${task_name} \
        output_file=${output_file_8} \
        faiss_index=${faiss_index} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        index_reader.task_name=${task_name} \
        num_ice=${number_of_ice_examples} \
        lambd=${lambda_val}

echo "-------------------------------------------------- Kerel retriever bert-base-uncased lambda 1 --------------------------------------------------------"
lambda_val=1
lambda_safe=${lambda_val/./_}
output_file_9="output/${task_name}/kernel_lambda_${lambda_safe}_retriever_${task_name}_${model_name}.json"

python kernel_retriever_gpu.py \
        pretrained_model_path=0 \
        model_name=${model_name} \
        task_name=${task_name} \
        output_file=${output_file_9} \
        faiss_index=${faiss_index} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        index_reader.task_name=${task_name} \
        num_ice=${number_of_ice_examples} \
        lambd=${lambda_val}


echo "-------------------------------------------------- Kernel retriever Poly bert-base-uncased lambda 0 --------------------------------------------------------"
lambda_val=0
lambda_safe=${lambda_val/./_}
output_file_10="output/${task_name}/kernel_lambda_${lambda_safe}_poly_retriever_${task_name}_${model_name}.json"

python kernel_retriever_gpu.py \
        pretrained_model_path=0 \
        model_name=${model_name} \
        task_name=${task_name} \
        output_file=${output_file_10} \
        faiss_index=${faiss_index} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        index_reader.task_name=${task_name} \
        num_ice=${number_of_ice_examples} \
        lambd=${lambda_val} \
        use_polynomial_kernel=true


echo "-------------------------------------------------- Kerel retriever Poly bert-base-uncased lambda 0.5 --------------------------------------------------------"
lambda_val=0.5
lambda_safe=${lambda_val/./_}
output_file_11="output/${task_name}/kernel_lambda_${lambda_safe}_poly_retriever_${task_name}_${model_name}.json"

python kernel_retriever_gpu.py \
        pretrained_model_path=0 \
        model_name=${model_name} \
        task_name=${task_name} \
        output_file=${output_file_11} \
        faiss_index=${faiss_index} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        index_reader.task_name=${task_name} \
        num_ice=${number_of_ice_examples} \
        lambd=${lambda_val} \
        use_polynomial_kernel=true

echo "-------------------------------------------------- Kerel retriever Poly bert-base-uncased lambda 1 --------------------------------------------------------"
lambda_val=1
lambda_safe=${lambda_val/./_}
output_file_12="output/${task_name}/kernel_lambda_${lambda_safe}_poly_retriever_${task_name}_${model_name}.json"

python kernel_retriever_gpu.py \
        pretrained_model_path=0 \
        model_name=${model_name} \
        task_name=${task_name} \
        output_file=${output_file_12} \
        faiss_index=${faiss_index} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        index_reader.task_name=${task_name} \
        num_ice=${number_of_ice_examples} \
        lambd=${lambda_val} \
        use_polynomial_kernel=true


# echo "-------------------------------------------------- Submodular retriever bert-large-uncased lambda 0 --------------------------------------------------------"
# lambda_val=0
# model_name="bert-large-uncased"
# lambda_safe=${lambda_val/./_}
# output_file_8="output/${task_name}/submodular_bert-large_lambda_${lambda_safe}_retriever_${task_name}_${model_name}.json"

# python submodular_retriever_gpu.py \
#         pretrained_model_path=0 \
#         model_name=${model_name} \
#         task_name=${task_name} \
#         output_file=${output_file_8} \
#         faiss_index=${faiss_index} \
#         index_reader.task_name=${task_name} \
#         num_ice=${number_of_ice_examples} \
#         lambd=${lambda_val}


# echo "-------------------------------------------------- Submodular retriever bert-large-uncased lambda 1 --------------------------------------------------------"
# lambda_val=1
# model_name="bert-large-uncased"
# lambda_safe=${lambda_val/./_}
# output_file_9="output/${task_name}/submodular_bert-large_lambda_${lambda_safe}_retriever_${task_name}_${model_name}.json"

# python submodular_retriever_gpu.py \
#         pretrained_model_path=0 \
#         model_name=${model_name} \
#         task_name=${task_name} \
#         output_file=${output_file_9} \
#         faiss_index=${faiss_index} \
#         index_reader.task_name=${task_name} \
#         num_ice=${number_of_ice_examples} \
#         lambd=${lambda_val}

echo "-------------------------------------------------- Evaluation --------------------------------------------------------"

accuracies=()

for retrieve_file in "$output_file_1" "$output_file_2" "$output_file_3" "$output_file_4" "$output_file_5" "$output_file_6" "$output_file_7" "$output_file_8" "$output_file_9" "$output_file_10" "$output_file_11" "$output_file_12"
do
    echo "Evaluating using retrieval file: $retrieve_file"

    output=$(python inferencer.py \
        task_name=${task_name} \
        dataset_reader.dataset_path=${retrieve_file} \
        dataset_reader.n_tokens=${n_tokens} \
        dataset_reader.ds_size=${test_set_size} \
        dataset_reader.ds_segment=${test_set_segment} \
        index_reader.ds_size=${example_bank_size} \
        index_reader.ds_segment=${example_bank_segment} \
        model_name=${llm_model_path} \
        batch_size=${batch_size} \
        n_ice_examples=${number_of_ice_examples} \
        pred_dir=${pred_dir})

    echo "$output"

    # Extract accuracy from the last line that contains 'metric'
    acc=$(echo "$output" | grep "metric" | grep -oP "'accuracy':\s*\K[0-9.]+")

    accuracies+=("$acc")
done

echo ""
echo "Accumulated Accuracy Scores:"
for acc in "${accuracies[@]}"; do
    echo "$acc"
done


echo "-------------------------------------------------- Finished --------------------------------------------------------"
